library(MASS)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x dplyr::select() masks MASS::select()
library(faraway)
#Factorize variables
birthwt$race <- as.factor(birthwt$race)
birthwt$smoke <- as.factor(birthwt$smoke)
birthwt$ht <- as.factor(birthwt$ht)
birthwt$ui <- as.factor(birthwt$ui)
birthwt$low <- as.factor(birthwt$low)
str(birthwt)
## 'data.frame': 189 obs. of 10 variables:
## $ low : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ age : int 19 33 20 21 18 21 22 17 29 26 ...
## $ lwt : int 182 155 105 108 107 124 118 103 123 113 ...
## $ race : Factor w/ 3 levels "1","2","3": 2 3 1 1 1 3 1 3 1 1 ...
## $ smoke: Factor w/ 2 levels "0","1": 1 1 2 2 2 1 1 1 2 2 ...
## $ ptl : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ht : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ui : Factor w/ 2 levels "0","1": 2 1 1 2 2 1 1 1 1 1 ...
## $ ftv : int 0 3 1 2 0 0 1 1 1 0 ...
## $ bwt : int 2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
#Run the regression, including all covariates
data = lm(bwt ~. - low, data = birthwt)
summary(data)
##
## Call:
## lm(formula = bwt ~ . - low, data = birthwt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1825.26 -435.21 55.91 473.46 1701.20
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2927.962 312.904 9.357 < 2e-16 ***
## age -3.570 9.620 -0.371 0.711012
## lwt 4.354 1.736 2.509 0.013007 *
## race2 -488.428 149.985 -3.257 0.001349 **
## race3 -355.077 114.753 -3.094 0.002290 **
## smoke1 -352.045 106.476 -3.306 0.001142 **
## ptl -48.402 101.972 -0.475 0.635607
## ht1 -592.827 202.321 -2.930 0.003830 **
## ui1 -516.081 138.885 -3.716 0.000271 ***
## ftv -14.058 46.468 -0.303 0.762598
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 650.3 on 179 degrees of freedom
## Multiple R-squared: 0.2427, Adjusted R-squared: 0.2047
## F-statistic: 6.376 on 9 and 179 DF, p-value: 7.891e-08
#Generate diagnostic plots for the model with all covariates
par(mfrow = c(2, 2))
plot(data)
#Boxcox test for the model
boxcox(data)
According to the result of boxcox test, transformation is needed.
data1 = lm(bwt**0.75 ~. ,data = birthwt)
boxcox(data1)
Examine Leverages of the transformed model
hatvalues(data1)
## 85 86 87 88 89 91 92
## 0.10151143 0.05949858 0.02852123 0.07061658 0.06515470 0.02275111 0.02259750
## 93 94 95 96 97 98 99
## 0.03019561 0.02783550 0.02921955 0.02890067 0.02928460 0.13026917 0.08182866
## 100 101 102 103 104 105 106
## 0.03439723 0.03439723 0.07307742 0.04595158 0.05696185 0.02637079 0.04246221
## 107 108 109 111 112 113 114
## 0.09849311 0.06678299 0.03019218 0.06549612 0.03356273 0.03205304 0.02573978
## 115 116 117 118 119 120 121
## 0.07087820 0.06036656 0.06036656 0.04606921 0.11547257 0.02113067 0.05786978
## 123 124 125 126 127 128 129
## 0.03029432 0.03441020 0.02860527 0.07336066 0.04597405 0.07413828 0.05608641
## 130 131 132 133 134 135 136
## 0.04958996 0.03197525 0.07089599 0.07089599 0.06830974 0.02565823 0.02845036
## 137 138 139 140 141 142 143
## 0.05379660 0.12133830 0.02276768 0.02465689 0.04798461 0.02466120 0.03078490
## 144 145 146 147 148 149 150
## 0.07827936 0.03979365 0.02559940 0.02805135 0.02805135 0.02822763 0.02458800
## 151 154 155 156 159 160 161
## 0.02693957 0.12190375 0.08834664 0.02882695 0.27004975 0.13926042 0.07866124
## 162 163 164 166 167 168 169
## 0.10438100 0.06804326 0.04428909 0.06340012 0.03570631 0.10492236 0.01877466
## 170 172 173 174 175 176 177
## 0.08700092 0.06503266 0.04818728 0.02076987 0.04342688 0.03835624 0.02371245
## 179 180 181 182 183 184 185
## 0.02258678 0.04835921 0.02604953 0.02388468 0.06101136 0.02130620 0.02359497
## 186 187 188 189 190 191 192
## 0.02975356 0.15586430 0.21280047 0.03570631 0.02221384 0.02338152 0.03092362
## 193 195 196 197 199 200 201
## 0.03092362 0.02403167 0.02383630 0.11673562 0.04324437 0.02417440 0.02339243
## 202 203 204 205 206 207 208
## 0.15259125 0.03013264 0.03484924 0.03769252 0.11220629 0.04301548 0.02559923
## 209 210 211 212 213 214 215
## 0.03109657 0.07796053 0.03973379 0.02525909 0.04614975 0.02983426 0.02649949
## 216 217 218 219 220 221 222
## 0.03519743 0.02898324 0.03424116 0.02465004 0.02456667 0.02438621 0.03324546
## 223 224 225 226 4 10 11
## 0.06335831 0.02807062 0.02182825 0.11080832 0.07636888 0.08251882 0.16500629
## 13 15 16 17 18 19 20
## 0.12647578 0.06912345 0.05079843 0.06121590 0.07674788 0.10424299 0.10657225
## 22 23 24 25 26 27 28
## 0.06251285 0.10662449 0.03848029 0.04511469 0.04603669 0.05014406 0.12398414
## 29 30 31 32 33 34 35
## 0.05652843 0.03605258 0.06217451 0.11151147 0.05972453 0.06461689 0.04618386
## 36 37 40 42 43 44 45
## 0.04910076 0.07416104 0.08901728 0.05908187 0.10349972 0.07496805 0.04372815
## 46 47 49 50 51 52 54
## 0.04775607 0.03593942 0.04940834 0.07560449 0.06043360 0.10894160 0.04302394
## 56 57 59 60 61 62 63
## 0.05452813 0.06209328 0.07899048 0.06428578 0.07521412 0.06688152 0.03656224
## 65 67 68 69 71 75 76
## 0.05729137 0.03543501 0.07088423 0.04502221 0.07366324 0.11982509 0.06368804
## 77 78 79 81 82 83 84
## 0.07482489 0.06492055 0.05288170 0.06026733 0.05139049 0.12683565 0.14186581
plot(hatvalues(data1), type="h")
Examine internally Studentized residuals of the transformed model
rstandard(data1)
## 85 86 87 88 89
## -1.2249828366 -1.6456905061 -1.8067504409 -0.8736954398 -0.9391407005
## 91 92 93 94 95
## -1.6336263130 -1.9942297593 -1.6141186923 -1.4087024179 -1.4468588966
## 96 97 98 99 100
## -1.3449933858 -1.5004005364 -0.7750809196 -0.4162043504 -1.3395518477
## 101 102 103 104 105
## -1.3395518477 -1.2391079755 -1.1937500490 -0.3697103116 -1.0448202740
## 106 107 108 109 111
## -0.8609883070 -0.4195572479 -1.5287490524 -0.8935130592 -0.0706531661
## 112 113 114 115 116
## -1.4750848658 -1.1131311824 -1.2708798680 -0.4911455771 -0.8887123610
## 117 118 119 120 121
## -0.8887123610 -0.9376241054 -0.2452280710 -1.2512123108 -0.6182075527
## 123 124 125 126 127
## -0.7188702422 -0.9413549307 -0.8533991686 -0.8867547270 -0.4022912245
## 128 129 130 131 132
## -0.3605644987 -1.3228336325 -0.4711192995 -1.1771345458 0.2323138820
## 133 134 135 136 137
## 0.2323138820 -0.7701058593 -0.6089539236 -0.8706843037 0.0072974752
## 138 139 140 141 142
## -0.4671848046 -0.4715230782 -0.4995873724 -0.1488245761 -0.3551312362
## 143 144 145 146 147
## -0.4072364552 1.0267708816 -0.1737837875 -0.2273392727 -0.3006241975
## 148 149 150 151 154
## -0.3006241975 -0.1319403940 -0.0945018024 -0.5516521873 -0.0685705233
## 155 156 159 160 161
## 0.3753011494 0.0002030361 0.1288800053 -0.0908988242 -0.1919574320
## 162 163 164 166 167
## -0.4204612953 0.5434690830 0.4895578076 0.1440538081 -0.1032444065
## 168 169 170 172 173
## -0.1594916814 -0.1987305643 0.2366063829 0.7602856171 -0.3324357423
## 174 175 176 177 179
## -0.1374781931 -0.0244927687 0.7489526816 0.3312524001 0.5396950258
## 180 181 182 183 184
## 0.8736724618 0.5741204000 0.1608586292 0.3383404012 0.2253255668
## 185 186 187 188 189
## 0.2352085118 0.6638278413 0.7136813461 1.2091380220 0.5002725172
## 190 191 192 193 195
## 0.4313144483 0.3662836799 0.5436084319 0.5436084319 0.5533095136
## 196 197 199 200 201
## 0.5738271949 1.1758874436 0.9220688135 0.6433840891 0.9808825183
## 202 203 204 205 206
## 1.3928157546 0.8612693970 0.5372495681 1.0853758622 1.1012937991
## 207 208 209 210 211
## 0.7986336007 1.1937944239 1.3585472034 2.0533318450 1.1263409456
## 212 213 214 215 216
## 1.4959936871 0.7259209447 1.5658498636 1.1272246473 1.4867434618
## 217 218 219 220 221
## 0.9061754624 1.6044518535 1.1975381918 1.2862245225 1.4570188645
## 222 223 224 225 226
## 1.6649137499 1.4181425206 1.9100107218 2.4000177605 3.8487117734
## 4 10 11 13 15
## -3.0711737887 -2.7178669572 -1.9709092641 -1.9679676837 -0.8485293236
## 16 17 18 19 20
## -1.5557813833 -0.6059216149 -1.3416141377 -0.7181919390 -0.7964088226
## 22 23 24 25 26
## -0.7285564421 -0.3465422061 -0.6534449866 -0.8899870421 -0.5472450476
## 27 28 29 30 31
## -0.8487455640 0.0194914672 -0.9691126700 -0.5017602441 0.4704416320
## 32 33 34 35 36
## -0.5454625942 -0.6989650113 0.5052187734 -0.4021094436 -0.6759598910
## 37 40 42 43 44
## 0.7663859257 0.3428895136 0.5863132194 1.0838405877 1.4338196327
## 45 46 47 49 50
## -0.0292466172 0.0853388606 0.1431764100 0.0683379317 0.5424336585
## 51 52 54 56 57
## 0.8407934738 0.1958252015 0.5393048314 0.4555593915 -0.1456222945
## 59 60 61 62 63
## 0.7755656830 0.9549596862 1.1137518007 1.2248904616 0.5965149426
## 65 67 68 69 71
## 0.4273956173 0.4843154892 0.4394189218 0.4100918090 0.6609296023
## 75 76 77 78 79
## 0.9583623332 0.7132860039 0.5078510932 0.8005092208 0.8961047776
## 81 82 83 84
## 0.6941609047 1.2936836709 1.2304267574 1.2324694009
plot(rstandard(data1), type="h")
Examine mean-shift t-statistics of the transformed model
rstudent(data1)
## 85 86 87 88 89
## -1.2267187385 -1.6536900642 -1.8184193289 -0.8731119415 -0.9388277709
## 91 92 93 94 95
## -1.6413820133 -2.0112147291 -1.6214888443 -1.4126363113 -1.4513486251
## 96 97 98 99 100
## -1.3480777182 -1.5057319367 -0.7742082410 -0.4152356891 -1.3425680463
## 101 102 103 104 105
## -1.3425680463 -1.2409862610 -1.1951859422 -0.3688119689 -1.0450908795
## 106 107 108 109 111
## -0.8603597955 -0.4185840813 -1.5345561263 -0.8930045587 -0.0704554106
## 112 113 114 115 116
## -1.4800091774 -1.1138836574 -1.2730940123 -0.4900962111 -0.8881851504
## 117 118 119 120 121
## -0.8881851504 -0.9373041500 -0.2445795766 -1.2532159862 -0.6171314407
## 123 124 125 126 127
## -0.7178909574 -0.9410523250 -0.8527449021 -0.8862199869 -0.4013420937
## 128 129 130 131 132
## -0.3596816258 -1.3256447662 -0.4700872410 -1.1784190093 0.2316955236
## 133 134 135 136 137
## 0.2316955236 -0.7692221158 -0.6078744880 -0.8700899236 0.0072769489
## 138 139 140 141 142
## -0.4661565251 -0.4704906388 -0.4985316981 -0.1484151748 -0.3542577947
## 143 144 145 146 147
## -0.4062802299 1.0269282935 -0.1733096464 -0.2267326987 -0.2998546896
## 148 149 150 151 154
## -0.2998546896 -0.1315756870 -0.0942383381 -0.5505712672 -0.0683785413
## 155 156 159 160 161
## 0.3743936056 0.0002024649 0.1285234696 -0.0906452346 -0.1914372820
## 162 163 164 166 167
## -0.4194869274 0.5423905203 0.4885096944 0.1436569667 -0.1029570684
## 168 169 170 172 173
## -0.1590544055 -0.1981935345 0.2359779337 0.7593809785 -0.3316035749
## 174 175 176 177 179
## -0.1370987533 -0.0244239131 0.7480254725 0.3304224655 0.5386177579
## 180 181 182 183 184
## 0.8730888800 0.5730362410 0.1604178030 0.3374972102 0.2247237900
## 185 186 187 188 189
## 0.2345833407 0.6627814490 0.7126942055 1.2107191961 0.4992163570
## 190 191 192 193 195
## 0.4303261154 0.3653910728 0.5425298239 0.5425298239 0.5522281917
## 196 197 199 200 201
## 0.5727430471 1.1771607671 0.9216789032 0.6423215865 0.9807776043
## 202 203 204 205 206
## 1.3965287335 0.8606418555 0.5361732086 1.0859221533 1.1019565670
## 207 208 209 210 211
## 0.7978177547 1.1952307291 1.3618042352 2.0722449776 1.1271966788
## 212 213 214 215 216
## 1.5012530322 0.7249528600 1.5723117782 1.1280874077 1.4918531962
## 217 218 219 220 221
## 0.9057179958 1.6116349279 1.1990093973 1.2886087379 1.4616627015
## 222 223 224 225 226
## 1.6733105301 1.4222105908 1.9244611451 2.4329556248 4.0082858587
## 4 10 11 13 15
## -3.1470496769 -2.7682704290 -1.9871677306 -1.9841358683 -0.8478589706
## 16 17 18 19 20
## -1.5620619183 -0.6048412824 -1.3446560777 -0.7172116087 -0.7955872905
## 22 23 24 25 26
## -0.7275927004 -0.3456840331 -0.6523898352 -0.8894647653 -0.5461653202
## 27 28 29 30 31
## -0.8480759179 0.0194366595 -0.9689462099 -0.5007030427 0.4694102156
## 32 33 34 35 36
## -0.5443834003 -0.6979573522 0.5041592241 -0.4011605768 -0.6749252681
## 37 40 42 43 44
## 0.7654941199 0.3420379649 0.5852293388 1.0843758943 1.4381153646
## 45 46 47 49 50
## -0.0291644182 0.0851005480 0.1427818846 0.0681465948 0.5413554381
## 51 52 54 56 57
## 0.8400982684 0.1952953939 0.5382277048 0.4545429864 -0.1452213177
## 59 60 61 62 63
## 0.7746940999 0.9547222434 1.1145090523 1.2266254462 0.5954324248
## 65 67 68 69 71
## 0.4264122293 0.4832716596 0.4384207177 0.4091315637 0.6598806441
## 75 76 77 78 79
## 0.9581416568 0.7122982781 0.5067898251 0.7996982207 0.8956065232
## 81 82 83 84
## 0.6931471046 1.2961524370 1.2322170352 1.2342802427
critval <- qt(0.05/(2*nobs(data1)), df=df.residual(data1)-1, lower=FALSE)
which(abs(rstudent(data1)) > critval)
## 226
## 130
Examine Cook’s distances to check influences
cooks.distance(data1)
## 85 86 87 88 89 91
## 1.541238e-02 1.557582e-02 8.712415e-03 5.272775e-03 5.588235e-03 5.648202e-03
## 92 93 94 95 96 97
## 8.358814e-03 7.374591e-03 5.165413e-03 5.728120e-03 4.894315e-03 6.174035e-03
## 98 99 100 101 102 103
## 8.180092e-03 1.403467e-03 5.811006e-03 5.811006e-03 1.100437e-02 6.239706e-03
## 104 105 106 107 108 109
## 7.505597e-04 2.687943e-03 2.988467e-03 1.748343e-03 1.520418e-02 2.259529e-03
## 111 112 113 114 115 116
## 3.180570e-05 6.869498e-03 3.730076e-03 3.879236e-03 1.672892e-03 4.612833e-03
## 117 118 119 120 121 123
## 4.612833e-03 3.859746e-03 7.136992e-04 3.072260e-03 2.134110e-03 1.467674e-03
## 124 125 126 127 128 129
## 2.870838e-03 1.949674e-03 5.659354e-03 7.089914e-04 9.463889e-04 9.452423e-03
## 130 131 132 133 134 135
## 1.052815e-03 4.160898e-03 3.743820e-04 3.743820e-04 3.952931e-03 8.877516e-04
## 136 137 138 139 140 141
## 2.018141e-03 2.752472e-07 2.740065e-03 4.709059e-04 5.736027e-04 1.014880e-04
## 142 143 144 145 146 147
## 2.898970e-04 4.788707e-04 8.139587e-03 1.137824e-04 1.234379e-04 2.371186e-04
## 148 149 150 151 154 155
## 2.371186e-04 4.596981e-05 2.046551e-05 7.659295e-04 5.934132e-05 1.240873e-03
## 156 159 160 161 162 163
## 1.112387e-10 5.586360e-04 1.215290e-04 2.859951e-04 1.873085e-03 1.960408e-03
## 164 166 167 168 169 170
## 1.009684e-03 1.277006e-04 3.588195e-05 2.710758e-04 6.869733e-05 4.849687e-04
## 172 173 174 175 176 177
## 3.655072e-03 5.086320e-04 3.644382e-05 2.475845e-06 2.033939e-03 2.422836e-04
## 179 180 181 182 183 184
## 6.118997e-04 3.526231e-03 8.014496e-04 5.755915e-05 6.761848e-04 1.004818e-04
## 185 186 187 188 189 190
## 1.215354e-04 1.228500e-03 8.549691e-03 3.592914e-02 8.424735e-04 3.842156e-04
## 191 192 193 195 196 197
## 2.920050e-04 8.572588e-04 8.572588e-04 6.853175e-04 7.309469e-04 1.661313e-02
## 199 200 201 202 203 204
## 3.493514e-03 9.322479e-04 2.095060e-03 3.175641e-02 2.095127e-03 9.474530e-04
## 205 206 207 208 209 210
## 4.194778e-03 1.393538e-02 2.606288e-03 3.403734e-03 5.385038e-03 3.240790e-02
## 211 212 213 214 215 216
## 4.772164e-03 5.272242e-03 2.317795e-03 6.854517e-03 3.144340e-03 7.330808e-03
## 217 218 219 220 221 222
## 2.228190e-03 8.297371e-03 3.294907e-03 3.787823e-03 4.823965e-03 8.665765e-03
## 223 224 225 226 4 10
## 1.236736e-02 9.578472e-03 1.168531e-02 1.678089e-01 7.089799e-02 6.039758e-02
## 11 13 15 16 17 18
## 6.978433e-02 5.097716e-02 4.860426e-03 1.177596e-02 2.176400e-03 1.360218e-02
## 19 20 22 23 24 25
## 5.456887e-03 6.878031e-03 3.217642e-03 1.302994e-03 1.553479e-03 3.402056e-03
## 26 27 28 29 30 31
## 1.313843e-03 3.457197e-03 4.888218e-06 5.115569e-03 8.560178e-04 1.333857e-03
## 32 33 34 35 36 37
## 3.394728e-03 2.821083e-03 1.602961e-03 7.117400e-04 2.144879e-03 4.277034e-03
## 40 42 43 44 45 46
## 1.044431e-03 1.962317e-03 1.232897e-02 1.514662e-02 3.555808e-06 3.320331e-05
## 47 49 50 51 52 54
## 6.947315e-05 2.206669e-05 2.187713e-03 4.133681e-03 4.262183e-04 1.188735e-03
## 56 57 59 60 61 62
## 1.088101e-03 1.276286e-04 4.689808e-03 5.695726e-03 9.171542e-03 9.776224e-03
## 63 65 67 68 69 71
## 1.227606e-03 1.009205e-03 7.833667e-04 1.339197e-03 7.207805e-04 3.157909e-03
## 75 76 77 78 79 81
## 1.136699e-02 3.146097e-03 1.896279e-03 4.044582e-03 4.075924e-03 2.809346e-03
## 82 83 84
## 8.242498e-03 1.999237e-02 2.282875e-02
which(cooks.distance(data1) >= 1)
## named integer(0)
plot(cooks.distance(data1), type="h")
Shows the interaction between race and smoking status during pregnancy
lmod.inter <- lm(bwt ~ race * smoke, data = birthwt)
lmod.main <- lm(bwt ~ race + smoke, data = birthwt)
anova(lmod.main, lmod.inter)
## Analysis of Variance Table
##
## Model 1: bwt ~ race + smoke
## Model 2: bwt ~ race * smoke
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 185 87631356
## 2 183 85529548 2 2101808 2.2485 0.1085
Shows the interaction between age and race during pregnancy
lmod.inter1 <- lm(bwt ~ age * race, data = birthwt)
lmod.main1 <- lm(bwt ~ age + race, data = birthwt)
anova(lmod.main1, lmod.inter1)
## Analysis of Variance Table
##
## Model 1: bwt ~ age + race
## Model 2: bwt ~ age * race
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 185 94754346
## 2 183 92431148 2 2323199 2.2998 0.1032
Shows the interaction between number of physician visits during the first trimester and number of previous premature labors during pregnancy
lmod.inter2 <- lm(bwt ~ ftv * ht, data = birthwt)
lmod.main2 <- lm(bwt ~ ftv + ht, data = birthwt)
anova(lmod.main2, lmod.inter2)
## Analysis of Variance Table
##
## Model 1: bwt ~ ftv + ht
## Model 2: bwt ~ ftv * ht
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 186 97610068
## 2 185 97552954 1 57114 0.1083 0.7424
Shows the interaction between mother’s age and mother’s weight in pounds at last menstrual period during pregnancy
lmod.inter3 <- lm(bwt ~ lwt * age, data = birthwt)
lmod.main3 <- lm(bwt ~ lwt + age, data = birthwt)
anova(lmod.main3, lmod.inter3)
## Analysis of Variance Table
##
## Model 1: bwt ~ lwt + age
## Model 2: bwt ~ lwt * age
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 186 96186834
## 2 185 95741853 1 444981 0.8598 0.355
Shows the interaction between smoking status and presence of uterine irritability during pregnancy
lmod.inter4 <- lm(bwt ~ smoke * ui, data = birthwt)
lmod.main4 <- lm(bwt ~ smoke + ui, data = birthwt)
anova(lmod.main4, lmod.inter4)
## Analysis of Variance Table
##
## Model 1: bwt ~ smoke + ui
## Model 2: bwt ~ smoke * ui
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 186 88913988
## 2 185 88471981 1 442007 0.9243 0.3376
Shows the interaction between age and smoking status during pregnancy
lmod.inter5 <- lm(bwt ~ age * smoke, data = birthwt)
lmod.main5 <- lm(bwt ~ age + smoke, data = birthwt)
anova(lmod.main5, lmod.inter5)
## Analysis of Variance Table
##
## Model 1: bwt ~ age + smoke
## Model 2: bwt ~ age * smoke
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 186 95672288
## 2 185 93062605 1 2609683 5.1878 0.02389 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxcox(lmod.inter5)
par(mfrow = c(2, 2))
plot(lmod.inter5)
# transform lmod.inter5
lmod.inter5_trans <- lm(bwt^1.25 ~ age * smoke, data = birthwt)
boxcox(lmod.inter5_trans)
par(mfrow = c(2, 2))
plot(lmod.inter5)
# didn't seem to change much from the original plots
Examine Leverages of the transformed model
hatvalues(lmod.inter5_trans)
## 85 86 87 88 89 91
## 0.014443758 0.035590176 0.018179969 0.015549613 0.026666860 0.010422673
## 92 93 94 95 96 97
## 0.009292381 0.020812178 0.033220950 0.018528745 0.014443758 0.014443758
## 98 99 100 101 102 103
## 0.009292381 0.021376048 0.026666860 0.026666860 0.029527931 0.015782131
## 104 105 106 107 108 109
## 0.012139799 0.027248154 0.030265300 0.025527257 0.055085805 0.014834130
## 111 112 113 114 115 116
## 0.009422505 0.014834130 0.032523397 0.017811672 0.018528745 0.020812178
## 117 118 119 120 121 123
## 0.020812178 0.014110911 0.091640993 0.009422505 0.009422505 0.033220950
## 124 125 126 127 128 129
## 0.021885718 0.022350753 0.048392722 0.067866070 0.015549613 0.014443758
## 130 131 132 133 134 135
## 0.008748922 0.010422673 0.026666860 0.026666860 0.030265300 0.014443758
## 136 137 138 139 140 141
## 0.008792297 0.013994652 0.009292381 0.008748922 0.013994652 0.040269139
## 142 143 144 145 146 147
## 0.014443758 0.024876637 0.015549613 0.021376048 0.012139799 0.020812178
## 148 149 150 151 154 155
## 0.020812178 0.008748922 0.008792297 0.014834130 0.018528745 0.012139799
## 156 159 160 161 162 163
## 0.008792297 0.027248154 0.012139799 0.009292381 0.013994652 0.048392722
## 164 166 167 168 169 170
## 0.013515085 0.024876637 0.039455328 0.017334551 0.009422505 0.057591699
## 172 173 174 175 176 177
## 0.018179969 0.008748922 0.009292381 0.030265300 0.021376048 0.012139799
## 179 180 181 182 183 184
## 0.008748922 0.032523397 0.014443758 0.008748922 0.055085805 0.009292381
## 185 186 187 188 189 190
## 0.008792297 0.010422673 0.021885718 0.015782131 0.039455328 0.017811672
## 191 192 193 195 196 197
## 0.017811672 0.021885718 0.021885718 0.021376048 0.008792297 0.021885718
## 199 200 201 202 203 204
## 0.008792297 0.008748922 0.012139799 0.009422505 0.021376048 0.009292381
## 205 206 207 208 209 210
## 0.026666860 0.024876637 0.030265300 0.017334551 0.033220950 0.035590176
## 211 212 213 214 215 216
## 0.018179969 0.014834130 0.034766058 0.014834130 0.009422505 0.024876637
## 217 218 219 220 221 222
## 0.012139799 0.010639546 0.010422673 0.009292381 0.009422505 0.025527257
## 223 224 225 226 4 10
## 0.048000429 0.021885718 0.008792297 0.145261702 0.027248154 0.017811672
## 11 13 15 16 17 18
## 0.079215834 0.009422505 0.009422505 0.012443422 0.008748922 0.008792297
## 19 20 22 23 24 25
## 0.008792297 0.015549613 0.057591699 0.021885718 0.009422505 0.024876637
## 26 27 28 29 30 31
## 0.015782131 0.018179969 0.010422673 0.014110911 0.010422673 0.012139799
## 32 33 34 35 36 37
## 0.009422505 0.014443758 0.021885718 0.018528745 0.008792297 0.032523397
## 40 42 43 44 45 46
## 0.018179969 0.013994652 0.012443422 0.018179969 0.032523397 0.009422505
## 47 49 50 51 52 54
## 0.012139799 0.017334551 0.026666860 0.018179969 0.010422673 0.010639546
## 56 57 59 60 61 62
## 0.048392722 0.029527931 0.013515085 0.018179969 0.014110911 0.029527931
## 63 65 67 68 69 71
## 0.008748922 0.040269139 0.013994652 0.032523397 0.013515085 0.020812178
## 75 76 77 78 79 81
## 0.010639546 0.012139799 0.018528745 0.056545370 0.027248154 0.034766058
## 82 83 84
## 0.013515085 0.020812178 0.015549613
plot(hatvalues(lmod.inter5_trans), type="h")
Examine internally Studentized residuals of the transformed model
rstandard(lmod.inter5_trans)
## 85 86 87 88 89 91
## -0.604869201 -1.169845743 -0.406716082 -0.329039793 -0.399548196 -0.550291522
## 92 93 94 95 96 97
## -0.571141358 -0.362651937 -0.028251578 -0.101968251 -0.326312631 -0.310762472
## 98 99 100 101 102 103
## -0.410902763 -0.753893853 -0.160106347 -0.160106347 -0.078667045 0.038074502
## 104 105 106 107 108 109
## -0.247689132 0.171253509 -0.720637963 -0.676413676 -0.901100982 -0.505991642
## 111 112 113 114 115 116
## -0.358243073 -0.485942978 0.011053129 -0.467158328 0.261371640 0.040540631
## 117 118 119 120 121 123
## 0.040540631 0.250040697 0.552998569 -0.214727444 -0.214727444 0.423633413
## 124 125 126 127 128 129
## 0.164914687 0.290401596 0.520178973 0.619831753 0.309697331 0.161364681
## 130 131 132 133 134 135
## -0.007536536 0.076746797 0.263656173 0.263656173 -0.364359790 0.202154834
## 136 137 138 139 140 141
## -0.008973175 0.404829044 0.089818549 0.053507314 0.466173560 0.701728605
## 142 143 144 145 146 147
## 0.326546188 0.455656564 0.545249580 -0.097174592 0.325095397 0.486143585
## 148 149 150 151 154 155
## 0.486143585 0.240812841 0.198707933 0.033310552 0.758227413 0.429744776
## 156 159 160 161 162 163
## 0.260577790 0.877091130 0.493401190 0.408450025 0.738806436 0.991906069
## 164 166 167 168 169 170
## 0.784928072 0.751688603 0.679275882 0.706061522 0.429243988 1.189041478
## 172 173 174 175 176 177
## 0.879013414 0.577537840 0.621313340 0.221928841 0.412946938 0.747070535
## 179 180 181 182 183 184
## 0.705073116 1.002694407 0.918332145 0.768374554 0.247960981 0.853021862
## 185 186 187 188 189 190
## 0.768586299 0.918369645 1.135176875 1.297085876 1.089475848 0.616877337
## 191 192 193 195 196 197
## 0.616877337 1.168730915 1.168730915 0.648984647 0.941670348 1.329568137
## 199 200 201 202 203 204
## 1.005774638 1.047861888 1.176203158 0.994569469 0.802591372 1.177576457
## 205 206 207 208 209 210
## 1.461746906 1.492783793 0.815744025 1.439546514 1.793841371 0.856489251
## 211 212 213 214 215 216
## 1.636651073 1.103528710 1.712286237 1.146972881 1.291897659 1.706080944
## 217 218 219 220 221 222
## 1.526320846 1.360909158 1.571484281 1.617361234 1.556799947 1.336908846
## 223 224 225 226 4 10
## 1.191918344 2.082018914 2.296281952 2.214276417 -2.490890016 -2.927261576
## 11 13 15 16 17 18
## -1.927483403 -2.393506876 -2.221592911 -2.170019390 -1.997534646 -1.899332761
## 19 20 22 23 24 25
## -1.864186225 -1.404639117 -1.111951954 -1.338303832 -1.698168747 -1.321681841
## 26 27 28 29 30 31
## -1.125252185 -1.254511812 -1.485275172 -1.139449135 -1.430797629 -1.278624318
## 32 33 34 35 36 37
## -1.487487239 -1.202268542 -1.077521227 -0.896604133 -1.385644948 -1.080134626
## 40 42 43 44 45 46
## -0.994153172 -0.859699592 -1.399088691 -0.880474411 -0.945306931 -1.241774707
## 47 49 50 51 52 54
## -1.032574079 -0.893977708 -0.820405639 -0.765697574 -0.991622148 -1.169982978
## 56 57 59 60 61 62
## -0.413186795 -0.674794116 -0.590871866 -0.649853307 -0.546413081 -0.636294419
## 63 65 67 68 69 71
## -0.926891241 -0.358372434 -0.557748848 -0.686360856 -0.512903496 -0.639819509
## 75 76 77 78 79 81
## -1.010378194 -0.747063803 -0.379574075 -0.700251786 -0.329879022 -0.437009026
## 82 83 84
## -0.415142929 -0.560987603 -0.466657862
plot(rstandard(lmod.inter5_trans), type="h")
Examine mean-shift t-statistics of the transformed model
rstudent(lmod.inter5_trans)
## 85 86 87 88 89 91
## -0.603829584 -1.171019065 -0.405796822 -0.328245354 -0.398638905 -0.549251943
## 92 93 94 95 96 97
## -0.570098478 -0.361799093 -0.028175179 -0.101695146 -0.325523204 -0.310002360
## 98 99 100 101 102 103
## -0.409977837 -0.753011129 -0.159684104 -0.159684104 -0.078455456 0.037971607
## 104 105 106 107 108 109
## -0.247059763 0.170803574 -0.719698507 -0.675418782 -0.900640948 -0.504971789
## 111 112 113 114 115 116
## -0.357397525 -0.484937433 0.011023218 -0.466169069 0.260712414 0.040431092
## 117 118 119 120 121 123
## 0.040431092 0.249406142 0.551958339 -0.214173004 -0.214173004 0.422691979
## 124 125 126 127 128 129
## 0.164480458 0.289681697 0.519150981 0.618797127 0.308939272 0.160939296
## 130 131 132 133 134 135
## -0.007516140 0.076540310 0.262992038 0.262992038 -0.363504150 0.201630000
## 136 137 138 139 140 141
## -0.008948892 0.403912376 0.089577421 0.053362916 0.465185229 0.700762719
## 142 143 144 145 146 147
## 0.325756330 0.454678602 0.544211389 -0.096914075 0.324308219 0.485137881
## 148 149 150 151 154 155
## 0.485137881 0.240198763 0.198191308 0.033220501 0.757353070 0.428795811
## 156 159 160 161 162 163
## 0.259920276 0.876541770 0.492389945 0.407528404 0.737896326 0.991862616
## 164 166 167 168 169 170
## 0.784110535 0.750801703 0.678283909 0.705101327 0.428295630 1.190380826
## 172 173 174 175 176 177
## 0.878470898 0.576494748 0.620279330 0.221357689 0.412019287 0.746175083
## 179 180 181 182 183 184
## 0.704111605 1.002709110 0.917941439 0.767520744 0.247331012 0.852391248
## 185 186 187 188 189 190
## 0.767732931 0.917979095 1.136068256 1.299497948 1.090029765 0.615841549
## 191 192 193 195 196 197
## 0.615841549 1.169894814 1.169894814 0.647966275 0.941380670 1.332350701
## 199 200 201 202 203 204
## 1.005806296 1.048141091 1.177430674 0.994540197 0.801816410 1.178815782
## 205 206 207 208 209 210
## 1.466283080 1.497791874 0.815003413 1.443759550 1.804751243 0.855869839
## 211 212 213 214 215 216
## 1.644168106 1.104182339 1.721346798 1.147957636 1.294252667 1.715008769
## 217 218 219 220 221 222
## 1.531865809 1.364071218 1.577797653 1.624510052 1.562857678 1.339778336
## 223 224 225 226 4 10
## 1.193283194 2.101146491 2.323418021 2.238141453 -2.526883453 -2.989391697
## 11 13 15 16 17 18
## -1.941864193 -2.424869769 -2.245739163 -2.192226802 -2.013965939 -1.912935230
## 19 20 22 23 24 25
## -1.876852792 -1.408367851 -1.112667064 -1.341189975 -1.706928926 -1.324372341
## 26 27 28 29 30 31
## -1.126067020 -1.256472483 -1.490166909 -1.140374032 -1.434886573 -1.280835949
## 32 33 34 35 36 37
## -1.492413105 -1.203726489 -1.077993105 -0.896126729 -1.389122150 -1.080624221
## 40 42 43 44 45 46
## -0.994121675 -0.859090702 -1.402743058 -0.879937132 -0.945033745 -1.243607690
## 47 49 50 51 52 54
## -1.032759906 -0.893490297 -0.819677753 -0.764838223 -0.991577190 -1.171157462
## 56 57 59 60 61 62
## -0.412258826 -0.673797608 -0.589829574 -0.648835555 -0.545374549 -0.635267893
## 63 65 67 68 69 71
## -0.926536625 -0.357526670 -0.556707634 -0.685376504 -0.511879467 -0.638795080
## 75 76 77 78 79 81
## -1.010435483 -0.746168339 -0.378694300 -0.699284009 -0.329083050 -0.436051448
## 82 83 84
## -0.414212383 -0.559945836 -0.465669073
critval <- qt(0.05/(2*nobs(lmod.inter5_trans)), df=df.residual(lmod.inter5_trans)-1, lower=FALSE)
which(abs(rstudent(lmod.inter5_trans)) > critval)
## named integer(0)
Examine Cook’s distances to check influences
cooks.distance(lmod.inter5_trans)
## 85 86 87 88 89 91
## 1.340484e-03 1.262600e-02 7.657446e-04 4.275261e-04 1.093424e-03 7.973611e-04
## 92 93 94 95 96 97
## 7.649072e-04 6.988300e-04 6.856622e-06 4.907252e-05 3.901275e-04 3.538311e-04
## 98 99 100 101 102 103
## 3.959129e-04 3.103645e-03 1.755769e-04 1.755769e-04 4.707341e-05 5.811428e-06
## 104 105 106 107 108 109
## 1.884820e-04 2.053780e-04 4.051971e-03 2.996396e-03 1.183407e-02 9.637834e-04
## 111 112 113 114 115 116
## 3.051923e-04 8.889214e-04 1.026753e-06 9.894142e-04 3.224225e-04 8.733182e-06
## 117 118 119 120 121 123
## 8.733182e-06 2.237115e-04 7.712946e-03 1.096461e-04 1.096461e-04 1.541719e-03
## 124 125 126 127 128 129
## 1.521353e-04 4.820001e-04 3.440075e-03 6.992976e-03 3.787396e-04 9.540163e-05
## 130 131 132 133 134 135
## 1.253298e-07 1.550922e-05 4.761308e-04 4.761308e-04 1.035841e-03 1.497294e-04
## 136 137 138 139 140 141
## 1.785541e-07 5.815220e-04 1.891706e-05 6.317383e-06 7.711132e-04 5.165368e-03
## 142 143 144 145 146 147
## 3.906862e-04 1.324181e-03 1.173969e-03 5.156524e-05 3.246965e-04 1.255800e-03
## 148 149 150 151 154 155
## 1.255800e-03 1.279588e-04 8.756047e-05 4.176922e-06 2.713360e-03 5.673842e-04
## 156 159 160 161 162 163
## 1.505749e-04 5.387217e-03 7.479222e-04 3.912005e-04 1.936797e-03 1.250845e-02
## 164 166 167 168 169 170
## 2.110222e-03 3.603695e-03 4.738277e-03 2.198528e-03 4.381536e-04 2.160005e-02
## 172 173 174 175 176 177
## 3.576780e-03 7.359898e-04 9.051965e-04 3.842904e-04 9.311938e-04 1.714665e-03
## 179 180 181 182 183 184
## 1.096931e-03 8.449531e-03 3.089857e-03 1.302737e-03 8.960950e-04 1.706247e-03
## 185 186 187 188 189 190
## 1.309975e-03 2.220774e-03 7.208390e-03 6.744533e-03 1.218887e-02 1.725232e-03
## 191 192 193 195 196 197
## 1.725232e-03 7.640825e-03 7.640825e-03 2.299961e-03 1.966416e-03 9.888545e-03
## 199 200 201 202 203 204
## 2.243257e-03 2.422808e-03 4.250311e-03 2.352275e-03 3.517552e-03 3.251620e-03
## 205 206 207 208 209 210
## 1.463507e-02 1.421235e-02 5.192062e-03 9.138993e-03 2.764349e-02 6.767875e-03
## 211 212 213 214 215 216
## 1.239976e-02 4.584163e-03 2.640071e-02 4.952210e-03 3.968936e-03 1.856399e-02
## 217 218 219 220 221 222
## 7.157275e-03 4.979283e-03 6.502636e-03 6.133884e-03 5.763463e-03 1.170518e-02
## 223 224 225 226 4 10
## 1.790776e-02 2.424826e-02 1.169306e-02 2.083155e-01 4.344944e-02 3.884839e-02
## 11 13 15 16 17 18
## 7.990528e-02 1.362346e-02 1.173672e-02 1.483355e-02 8.804395e-03 7.999812e-03
## 19 20 22 23 24 25
## 7.706484e-03 7.791037e-03 1.889004e-02 1.001891e-02 6.857718e-03 1.114105e-02
## 26 27 28 29 30 31
## 5.075913e-03 7.285356e-03 5.808757e-03 4.645761e-03 5.390460e-03 5.022754e-03
## 32 33 34 35 36 37
## 5.261680e-03 5.295924e-03 6.494756e-03 3.794110e-03 4.257764e-03 9.805082e-03
## 40 42 43 44 45 46
## 4.575177e-03 2.622505e-03 6.166068e-03 3.588680e-03 7.510021e-03 3.666938e-03
## 47 49 50 51 52 54
## 3.275657e-03 3.524523e-03 4.610071e-03 2.714037e-03 2.589178e-03 3.680168e-03
## 56 57 59 60 61 62
## 2.170477e-03 3.463638e-03 1.195790e-03 1.954933e-03 1.068339e-03 3.079684e-03
## 63 65 67 68 69 71
## 1.895695e-03 1.347200e-03 1.103825e-03 3.959136e-03 9.010308e-04 2.175237e-03
## 75 76 77 78 79 81
## 2.744584e-03 1.714634e-03 6.799884e-04 7.347244e-03 7.620517e-04 1.719664e-03
## 82 83 84
## 5.902875e-04 1.672238e-03 8.599297e-04
which(cooks.distance(lmod.inter5_trans) >= 1)
## named integer(0)
plot(cooks.distance(lmod.inter5_trans), type="h")
low variable was removed because this only indicates that the birth weight of the child is less than 2.5kg. It just tells us whether or not the birth weight was low, and does not actually contribute to the actual birth weight.# experimenting with categorical variables separately
lmod.race = lm(bwt ~ race, data = birthwt)
lmod.smoke = lm(bwt ~ smoke, data = birthwt)
lmod.ht = lm(bwt ~ ht, data = birthwt)
lmod.ui = lm(bwt ~ ui, data = birthwt)
anova(lmod.race)
## Analysis of Variance Table
##
## Response: bwt
## Df Sum Sq Mean Sq F value Pr(>F)
## race 2 5015725 2507863 4.9125 0.008336 **
## Residuals 186 94953931 510505
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lmod.smoke)
## Analysis of Variance Table
##
## Response: bwt
## Df Sum Sq Mean Sq F value Pr(>F)
## smoke 1 3625946 3625946 7.0378 0.008667 **
## Residuals 187 96343710 515207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lmod.ht)
## Analysis of Variance Table
##
## Response: bwt
## Df Sum Sq Mean Sq F value Pr(>F)
## ht 1 2130425 2130425 4.0719 0.04503 *
## Residuals 187 97839231 523204
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lmod.ui)
## Analysis of Variance Table
##
## Response: bwt
## Df Sum Sq Mean Sq F value Pr(>F)
## ui 1 8059031 8059031 16.397 7.518e-05 ***
## Residuals 187 91910625 491501
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxcox(lmod.race)
par(mfrow = c(2, 2))
plot(lmod.race)
boxcox(lmod.smoke)
par(mfrow = c(2, 2))
plot(lmod.smoke)
boxcox(lmod.race)
par(mfrow = c(2, 2))
plot(lmod.race)
boxcox(lmod.ht)
par(mfrow = c(2, 2))
plot(lmod.ht)
boxcox(lmod.ui)
par(mfrow = c(2, 2))
plot(lmod.ui)
TukeyHSD(aov(bwt ~ race, data = birthwt))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = bwt ~ race, data = birthwt)
##
## $race
## diff lwr upr p adj
## 2-1 -383.02644 -756.2363 -9.816581 0.0428037
## 3-1 -297.43517 -566.1652 -28.705095 0.0260124
## 3-2 85.59127 -304.4521 475.634630 0.8624372
with(birthwt, tapply(bwt, race, mean))
## 1 2 3
## 3102.719 2719.692 2805.284
TukeyHSD(aov(bwt ~ smoke, data = birthwt))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = bwt ~ smoke, data = birthwt)
##
## $smoke
## diff lwr upr p adj
## 1-0 -283.7767 -494.7973 -72.75612 0.0086667
with(birthwt, tapply(bwt, smoke, mean))
## 0 1
## 3055.696 2771.919
TukeyHSD(aov(bwt ~ ht, data = birthwt))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = bwt ~ ht, data = birthwt)
##
## $ht
## diff lwr upr p adj
## 1-0 -435.3983 -861.0528 -9.743799 0.045032
with(birthwt, tapply(bwt, ht, mean))
## 0 1
## 2972.232 2536.833
TukeyHSD(aov(bwt ~ ui, data = birthwt))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = bwt ~ ui, data = birthwt)
##
## $ui
## diff lwr upr p adj
## 1-0 -581.2733 -864.4574 -298.0892 7.52e-05
with(birthwt, tapply(bwt, ui, mean))
## 0 1
## 3030.702 2449.429
hsb dataset
head(hsb)
## id gender race ses schtyp prog read write math science socst
## 1 70 male white low public general 57 52 41 47 57
## 2 121 female white middle public vocation 68 59 53 63 61
## 3 86 male white high public general 44 33 54 58 31
## 4 141 male white high public vocation 63 44 47 53 56
## 5 172 male white middle public academic 47 52 57 53 61
## 6 113 male white middle public academic 44 52 51 63 61
str(hsb)
## 'data.frame': 200 obs. of 11 variables:
## $ id : int 70 121 86 141 172 113 50 11 84 48 ...
## $ gender : Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 2 2 2 ...
## $ race : Factor w/ 4 levels "african-amer",..: 4 4 4 4 4 4 1 3 4 1 ...
## $ ses : Factor w/ 3 levels "high","low","middle": 2 3 1 1 3 3 3 3 3 3 ...
## $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
## $ prog : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
## $ read : int 57 68 44 63 47 44 50 34 63 57 ...
## $ write : int 52 59 33 44 52 52 59 46 57 55 ...
## $ math : int 41 53 54 47 57 51 42 45 54 52 ...
## $ science: int 47 63 58 53 53 63 53 39 58 50 ...
## $ socst : int 57 61 31 56 61 61 61 36 51 51 ...
pairs(hsb, col = "dodgerblue")
mod_hsb <- lm(socst ~., data = hsb)
summary(mod_hsb)
##
## Call:
## lm(formula = socst ~ ., data = hsb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.6461 -4.7879 0.8008 4.9662 15.8981
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.73934 5.47198 2.511 0.01290 *
## id 0.03210 0.02056 1.561 0.12019
## gendermale -0.59257 1.22689 -0.483 0.62968
## raceasian -5.80606 3.00871 -1.930 0.05517 .
## racehispanic -0.27575 2.43386 -0.113 0.90992
## racewhite -4.82208 2.51164 -1.920 0.05641 .
## seslow -4.91105 1.64123 -2.992 0.00315 **
## sesmiddle -1.02520 1.31772 -0.778 0.43756
## schtyppublic 3.11868 1.99826 1.561 0.12030
## proggeneral -0.66574 1.50878 -0.441 0.65955
## progvocation -4.16415 1.57032 -2.652 0.00870 **
## read 0.35546 0.08307 4.279 3.01e-05 ***
## write 0.36521 0.08830 4.136 5.35e-05 ***
## math 0.07380 0.09147 0.807 0.42083
## science -0.03770 0.08560 -0.440 0.66011
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.583 on 185 degrees of freedom
## Multiple R-squared: 0.5362, Adjusted R-squared: 0.5011
## F-statistic: 15.27 on 14 and 185 DF, p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(mod_hsb)
Based on the scale-location plot, the horizontal line goes off at the end. The non-equally spread points indicates possible violation of homoscedasticity.
vif(mod_hsb) > 5
## id gendermale raceasian racehispanic racewhite seslow
## FALSE FALSE FALSE FALSE FALSE FALSE
## sesmiddle schtyppublic proggeneral progvocation read write
## FALSE FALSE FALSE FALSE FALSE FALSE
## math science
## FALSE FALSE
VIF for all predictor variables are smaller than 5, which indicates there is no multicolinearity issue.
boxcox(mod_hsb) #see if we need transformation
mod_hsb_trans <- lm((socst) ^(3/2) ~., data = hsb)
summary(mod_hsb_trans)
##
## Call:
## lm(formula = (socst)^(3/2) ~ ., data = hsb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -247.267 -55.816 6.701 50.998 173.586
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -29.5501 57.7631 -0.512 0.60956
## id 0.3288 0.2171 1.515 0.13150
## gendermale -5.6820 12.9513 -0.439 0.66138
## raceasian -62.8003 31.7604 -1.977 0.04949 *
## racehispanic -5.1399 25.6922 -0.200 0.84166
## racewhite -50.3229 26.5133 -1.898 0.05925 .
## seslow -51.4834 17.3251 -2.972 0.00336 **
## sesmiddle -12.7946 13.9101 -0.920 0.35887
## schtyppublic 32.5109 21.0939 1.541 0.12497
## proggeneral -8.7874 15.9270 -0.552 0.58180
## progvocation -41.7468 16.5766 -2.518 0.01264 *
## read 3.8658 0.8769 4.409 1.76e-05 ***
## write 3.7858 0.9321 4.062 7.19e-05 ***
## math 0.7996 0.9656 0.828 0.40868
## science -0.2980 0.9036 -0.330 0.74193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 80.05 on 185 degrees of freedom
## Multiple R-squared: 0.5415, Adjusted R-squared: 0.5068
## F-statistic: 15.61 on 14 and 185 DF, p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(mod_hsb_trans)
Based on the Fitted VS Residual plot, the model has a linear relationship and the errors are not correlated. Normal QQ plot looks find which indicates no non-normality error and residual VS leverage plot looks good. Scale-laction plot, especially the red line, still looks kind of off at the end, indicating average magnitude of the standardized residuals is changing much as a function of the fitted values.The sread around the red line vary with the fitted values indicating possible violation of homoscedasticity.
Examine Leverages of the transformed model
hatvalues(mod_hsb_trans)
## 1 2 3 4 5 6 7
## 0.08760006 0.06654660 0.10707364 0.07988727 0.04774977 0.06529547 0.12317064
## 8 9 10 11 12 13 14
## 0.08848586 0.05364112 0.08470966 0.05240767 0.08816233 0.04966763 0.04554697
## 15 16 17 18 19 20 21
## 0.12203656 0.05805764 0.05747295 0.07135441 0.04691042 0.06946711 0.14959859
## 22 23 24 25 26 27 28
## 0.09652573 0.11229457 0.07550299 0.07184258 0.09295706 0.04638736 0.08592565
## 29 30 31 32 33 34 35
## 0.08512408 0.14666987 0.09348278 0.05784220 0.06144604 0.11022622 0.07019808
## 36 37 38 39 40 41 42
## 0.06982927 0.08575335 0.04715196 0.09457706 0.05726386 0.06900212 0.05202528
## 43 44 45 46 47 48 49
## 0.04160831 0.08857416 0.09620138 0.08348230 0.11191593 0.04102732 0.06966431
## 50 51 52 53 54 55 56
## 0.07209880 0.11977638 0.08766678 0.10730625 0.06719178 0.05214204 0.12028562
## 57 58 59 60 61 62 63
## 0.09865139 0.08848403 0.05185511 0.07471770 0.07158932 0.07844548 0.08679082
## 64 65 66 67 68 69 70
## 0.07407182 0.08957437 0.04149122 0.04790914 0.06535677 0.09876185 0.08049563
## 71 72 73 74 75 76 77
## 0.06830948 0.03072163 0.06887855 0.07853779 0.06985366 0.07875184 0.06019573
## 78 79 80 81 82 83 84
## 0.06241211 0.06533582 0.07897227 0.03482619 0.10148211 0.06517921 0.12746560
## 85 86 87 88 89 90 91
## 0.07126038 0.04203767 0.04895278 0.08080355 0.07937829 0.04582378 0.13428799
## 92 93 94 95 96 97 98
## 0.08745787 0.04775667 0.08282564 0.05266734 0.08147288 0.07034849 0.04895356
## 99 100 101 102 103 104 105
## 0.07416944 0.06187093 0.04857761 0.07308365 0.07748209 0.04622431 0.05486199
## 106 107 108 109 110 111 112
## 0.08324026 0.04767739 0.09638832 0.12999437 0.06831722 0.09590640 0.07451838
## 113 114 115 116 117 118 119
## 0.05677262 0.05574984 0.12869202 0.04436096 0.06253553 0.04465007 0.06320879
## 120 121 122 123 124 125 126
## 0.07342890 0.13711394 0.04473522 0.05560759 0.04667800 0.11808045 0.04139177
## 127 128 129 130 131 132 133
## 0.05689552 0.12319817 0.06145225 0.08621516 0.03029201 0.13328840 0.07712424
## 134 135 136 137 138 139 140
## 0.05284149 0.06502459 0.06529938 0.05584540 0.09376271 0.07789263 0.11189291
## 141 142 143 144 145 146 147
## 0.09832406 0.05478070 0.06151496 0.07478608 0.02949791 0.06945005 0.06757228
## 148 149 150 151 152 153 154
## 0.05897733 0.07765690 0.07924103 0.09675952 0.04056916 0.05448368 0.08705666
## 155 156 157 158 159 160 161
## 0.05489755 0.04887893 0.03330984 0.05062614 0.05913023 0.09998584 0.05383809
## 162 163 164 165 166 167 168
## 0.04729133 0.07957406 0.07675927 0.03944349 0.06560886 0.05051502 0.05984618
## 169 170 171 172 173 174 175
## 0.04731782 0.06038065 0.05291020 0.07627798 0.13199544 0.12143042 0.07931966
## 176 177 178 179 180 181 182
## 0.05435546 0.05505856 0.07517485 0.07328618 0.08333103 0.04832971 0.06019467
## 183 184 185 186 187 188 189
## 0.06840350 0.05422377 0.17280982 0.12743313 0.05680168 0.13925401 0.05526570
## 190 191 192 193 194 195 196
## 0.09084360 0.05930646 0.10210514 0.06757852 0.16049308 0.06338056 0.17238198
## 197 198 199 200
## 0.05614329 0.10793944 0.04537972 0.04522051
plot(hatvalues(mod_hsb_trans), type="h")
Examine internally Studentized residuals of the transformed model
rstandard(mod_hsb_trans)
## 1 2 3 4 5 6
## 1.107000432 0.371045020 -1.517239062 0.463396106 0.990416926 1.500687398
## 7 8 9 10 11 12
## 0.664944301 -1.267976453 -0.950450782 -1.192223564 1.646027863 0.446079468
## 13 14 15 16 17 18
## 0.924103545 -1.861930306 0.537003471 1.637409563 0.549219670 -0.061613379
## 19 20 21 22 23 24
## -0.641787714 -0.406465135 -2.101440101 1.001234946 0.752874015 0.279306967
## 25 26 27 28 29 30
## 0.472915955 -1.104909208 0.084937757 -0.421054088 -0.109997030 -0.156937673
## 31 32 33 34 35 36
## 1.129816736 -0.349638957 1.096589276 -1.467905398 0.652378171 0.575627086
## 37 38 39 40 41 42
## 0.799041856 0.540314553 -0.153322827 1.711159690 0.321019807 -0.820032304
## 43 44 45 46 47 48
## -0.763513336 -0.323188446 0.797174763 -1.253519261 -0.142304516 -1.613806192
## 49 50 51 52 53 54
## -1.184378109 -0.977689629 0.278147515 -0.243193452 -0.476286845 1.897854629
## 55 56 57 58 59 60
## 0.049859827 -0.119630494 -0.021018462 0.558012723 -2.386055481 0.313286030
## 61 62 63 64 65 66
## 1.674034110 1.154287026 0.307401467 -0.302642379 1.514184679 -1.891916826
## 67 68 69 70 71 72
## 0.996387361 -1.430938934 -0.847267858 -0.935479687 0.337008461 0.407263585
## 73 74 75 76 77 78
## 0.075245743 0.632765309 0.659347075 -1.611253064 0.065141212 0.584809612
## 79 80 81 82 83 84
## 1.090003172 0.712636521 -0.836019576 -0.548973997 -0.815820381 -0.425396634
## 85 86 87 88 89 90
## -0.462362382 0.784700977 0.734536662 1.821189849 -1.250532745 0.159565683
## 91 92 93 94 95 96
## -1.236542560 -0.944905034 -0.412020226 0.492734776 0.898170456 -0.561177883
## 97 98 99 100 101 102
## -1.269390450 1.113173174 0.509345714 -0.342309944 0.435112087 0.906381074
## 103 104 105 106 107 108
## -1.165245406 -1.235271222 -1.012566706 0.007000562 -0.923294914 0.214262235
## 109 110 111 112 113 114
## 1.113223638 0.783014341 0.481896490 0.467611663 0.436837740 -0.783429461
## 115 116 117 118 119 120
## 0.232443450 0.086406526 -0.940077593 -0.193988931 0.175640124 1.109867824
## 121 122 123 124 125 126
## -0.185572302 0.605407285 0.441012158 -0.241527750 -0.724649954 0.228397303
## 127 128 129 130 131 132
## 0.943096602 -0.599695554 -0.495083916 -1.045927743 -0.392175491 0.156023620
## 133 134 135 136 137 138
## -1.080088166 -0.002035560 1.607997314 0.040970115 -1.141281063 0.672905647
## 139 140 141 142 143 144
## -0.016469461 1.167670596 1.535960265 0.011134988 0.628872021 -1.959022809
## 145 146 147 148 149 150
## 1.284006522 1.228408005 -2.551063985 0.113395935 -0.618587987 -1.449067177
## 151 152 153 154 155 156
## 0.035940372 -0.667565926 0.277919711 1.142463184 1.274928012 1.205105922
## 157 158 159 160 161 162
## 1.385249857 1.599191486 -0.456524000 -0.313238059 1.077942453 0.551784999
## 163 164 165 166 167 168
## -3.219641871 0.362269715 0.680342845 -0.855932813 0.569604905 -1.098272409
## 169 170 171 172 173 174
## -2.496010708 -1.792057999 -0.625964944 2.025696084 -1.211950882 -0.949016073
## 175 176 177 178 179 180
## 0.381867299 1.079884602 1.286365684 -1.111275870 -1.619328460 -0.355667462
## 181 182 183 184 185 186
## -1.185243229 0.142494857 0.269815767 -0.048887399 0.413188487 2.283733859
## 187 188 189 190 191 192
## -0.674494781 -1.079016150 -0.156480564 2.274219364 -0.318386627 -1.054654210
## 193 194 195 196 197 198
## 0.994000249 0.120991172 -0.102920319 0.793101249 0.129132679 0.134712098
## 199 200
## 0.385286606 -0.601720723
plot(rstandard(mod_hsb_trans), type="h")
Examine mean-shift t-statistics of the transformed model
rstudent(mod_hsb_trans)
## 1 2 3 4 5 6
## 1.107679244 0.370178603 -1.522635837 0.462410433 0.990365594 1.505819459
## 7 8 9 10 11 12
## 0.663938602 -1.270075761 -0.950201275 -1.193591129 1.653727567 0.445111661
## 13 14 15 16 17 18
## 0.923737053 -1.874538145 0.535968032 1.644941283 0.548180367 -0.061447262
## 19 20 21 22 23 24
## -0.640764512 -0.405546218 -2.121223126 1.001241670 0.751989353 0.278609810
## 25 26 27 28 29 30
## 0.471921415 -1.105572824 0.084709536 -0.420115910 -0.109702926 -0.156523362
## 31 32 33 34 35 36
## 1.130666548 -0.348807971 1.097193221 -1.472533329 0.651362267 0.574584018
## 37 38 39 40 41 42
## 0.798258013 0.539277937 -0.152917596 1.720196072 0.320240217 -0.819303388
## 43 44 45 46 47 48
## -0.762649529 -0.322404808 0.796386315 -1.255469861 -0.141927156 -1.620888254
## 49 50 51 52 53 54
## -1.185676464 -0.977572426 0.277452767 -0.242574059 -0.475289331 1.911416941
## 55 56 57 58 59 60
## 0.049725222 -0.119311345 -0.020961604 0.556971460 -2.417080657 0.312521076
## 61 62 63 64 65 66
## 1.682293925 1.155330996 0.306647853 -0.301898061 1.519532085 -1.905318529
## 67 68 69 70 71 72
## 0.996367833 -1.435029867 -0.846619023 -0.935162402 0.336199609 0.406343577
## 73 74 75 76 77 78
## 0.075043249 0.631736814 0.658336621 -1.618287394 0.064965661 0.583766747
## 79 80 81 82 83 84
## 1.090560766 0.711685376 -0.835336443 -0.547934758 -0.815079976 -0.424453001
## 85 86 87 88 89 90
## -0.461377713 0.783882918 0.733619303 1.832764510 -1.252453166 0.159144792
## 91 92 93 94 95 96
## -1.238324058 -0.944630016 -0.411093807 0.491724022 0.897699069 -0.560136086
## 97 98 99 100 101 102
## -1.271504537 1.113897305 0.508323786 -0.341491693 0.434156722 0.905941815
## 103 104 105 106 107 108
## -1.166379999 -1.237040295 -1.012636303 0.006981617 -0.922924999 0.213708881
## 109 110 111 112 113 114
## 1.113948142 0.782192432 0.480894223 0.466621979 0.435880361 -0.782608497
## 115 116 117 118 119 120
## 0.231848232 0.086174418 -0.939780756 -0.193483606 0.175179384 1.110567601
## 121 122 123 124 125 126
## -0.185087303 0.604367809 0.440049993 -0.240912074 -0.723716642 0.227811297
## 127 128 129 130 131 132
## 0.942813368 -0.598654726 -0.494071446 -1.046194911 -0.391276801 0.155611602
## 133 134 135 136 137 138
## -1.080577445 -0.002030051 1.614971032 0.040859421 -1.142220437 0.671907295
## 139 140 141 142 143 144
## -0.016424900 1.168825556 1.541664815 0.011104856 0.627841501 -1.974306500
## 145 146 147 148 149 150
## 1.286275840 1.230110568 -2.590125436 0.113092974 -0.617552861 -1.453417334
## 151 152 153 154 155 156
## 0.035843229 -0.666562570 0.277225438 1.143411933 1.277100380 1.206589760
## 157 158 159 160 161 162
## 1.388721915 1.606002696 -0.455545152 -0.312473196 1.078417178 0.550745052
## 163 164 165 166 167 168
## -3.304853359 0.361417499 0.679351984 -0.855311592 0.568562132 -1.098888324
## 169 170 171 172 173 174
## -2.532258980 -1.802925312 -0.624933012 2.042998517 -1.213497841 -0.948759920
## 175 176 177 178 179 180
## 0.380984008 1.080372497 1.288660481 -1.111985997 -1.626514365 -0.354826228
## 181 182 183 184 185 186
## -1.186549157 0.142117013 0.269138508 -0.048755407 0.412260516 2.310352287
## 187 188 189 190 191 192
## -0.673497979 -1.079498148 -0.156067399 2.300449460 -0.317611986 -1.054976186
## 193 194 195 196 197 198
## 0.993967936 0.120668500 -0.102644718 0.792302910 0.128789003 0.134354108
## 199 200
## 0.384398133 -0.600680337
critval <- qt(0.05/(2*nobs(mod_hsb_trans)), df=df.residual(mod_hsb_trans)-1, lower=FALSE)
which(abs(rstudent(mod_hsb_trans)) > critval)
## named integer(0)
Examine Cook’s distances to check influences
cooks.distance(mod_hsb_trans)
## 1 2 3 4 5 6
## 7.843746e-03 6.543275e-04 1.840279e-02 1.242940e-03 3.279178e-03 1.048813e-02
## 7 8 9 10 11 12
## 4.140677e-03 1.040499e-02 3.413579e-03 8.769972e-03 9.989792e-03 1.282622e-03
## 13 14 15 16 17 18
## 2.975417e-03 1.102911e-02 2.672246e-03 1.101688e-02 1.226226e-03 1.944597e-05
## 19 20 21 22 23 24
## 1.351535e-03 8.222480e-04 5.179005e-02 7.140161e-03 4.780169e-03 4.247476e-04
## 25 26 27 28 29 30
## 1.154083e-03 8.340968e-03 2.339580e-05 1.111030e-03 7.505176e-05 2.822195e-04
## 31 32 33 34 35 36
## 8.775671e-03 5.003450e-04 5.248454e-03 1.779550e-02 2.142113e-03 1.658311e-03
## 37 38 39 40 41 42
## 3.992413e-03 9.631150e-04 1.637030e-04 1.185715e-02 5.091973e-04 2.460302e-03
## 43 44 45 46 47 48
## 1.687249e-03 6.767155e-04 4.509469e-03 9.541669e-03 1.701312e-04 7.428110e-03
## 49 50 51 52 53 54
## 7.002613e-03 4.951503e-03 7.018386e-04 3.788733e-04 1.817893e-03 1.729651e-02
## 55 56 57 58 59 60
## 9.117065e-06 1.304562e-04 3.223451e-06 2.015105e-03 2.075806e-02 5.283723e-04
## 61 62 63 64 65 66
## 1.440607e-02 7.561071e-03 5.987205e-04 4.884768e-04 1.503854e-02 1.032935e-02
## 67 68 69 70 71 72
## 3.330467e-03 9.545433e-03 5.244449e-03 5.107354e-03 5.551363e-04 3.504738e-04
## 73 74 75 76 77 78
## 2.792224e-05 2.275072e-03 2.176581e-03 1.479518e-02 1.811960e-05 1.517730e-03
## 79 80 81 82 83 84
## 5.536815e-03 2.902998e-03 1.681288e-03 2.269212e-03 3.093702e-03 1.762411e-03
## 85 86 87 88 89 90
## 1.093523e-03 1.801388e-03 1.851446e-03 1.943754e-02 8.989166e-03 8.151734e-05
## 91 92 93 94 95 96
## 1.581214e-02 5.704677e-03 5.675863e-04 1.461666e-03 2.989958e-03 1.862220e-03
## 97 98 99 100 101 102
## 8.128938e-03 4.252230e-03 1.385567e-03 5.151952e-04 6.444271e-04 4.318273e-03
## 103 104 105 106 107 108
## 7.602739e-03 4.930121e-03 3.967641e-03 2.966555e-07 2.845234e-03 3.264695e-04
## 109 110 111 112 113 114
## 1.234457e-02 2.997162e-03 1.642292e-03 1.173748e-03 7.657229e-04 2.415823e-03
## 115 116 117 118 119 120
## 5.320143e-04 2.310516e-05 3.930141e-03 1.172525e-04 1.387685e-04 6.507880e-03
## 121 122 123 124 125 126
## 3.648070e-04 1.144274e-03 7.634691e-04 1.904212e-04 4.687209e-03 1.501632e-04
## 127 128 129 130 131 132
## 3.577168e-03 3.368784e-03 1.069911e-03 6.881003e-03 3.202999e-04 2.495785e-04
## 133 134 135 136 137 138
## 6.499424e-03 1.541093e-08 1.198828e-02 7.817712e-06 5.136156e-03 3.123240e-03
## 139 140 141 142 143 144
## 1.527504e-06 1.145214e-02 1.715055e-02 4.790526e-07 1.728171e-03 2.068075e-02
## 145 146 147 148 149 150
## 3.340704e-03 7.508045e-03 3.144162e-02 5.372652e-05 2.147827e-03 1.204731e-02
## 151 152 153 154 155 156
## 9.224953e-06 1.256260e-03 2.967186e-04 8.297578e-03 6.294398e-03 4.975596e-03
## 157 158 159 160 161 162
## 4.408087e-03 9.091745e-03 8.732046e-04 7.266862e-04 4.407822e-03 1.007558e-03
## 163 164 165 166 167 168
## 5.974569e-02 7.274258e-04 1.267117e-03 3.429429e-03 1.150770e-03 5.118780e-03
## 169 170 171 172 173 174
## 2.062901e-02 1.375811e-02 1.459342e-03 2.258995e-02 1.489072e-02 8.298646e-03
## 175 176 177 178 179 180
## 8.375401e-04 4.468674e-03 6.427730e-03 6.692146e-03 1.382468e-02 7.666395e-04
## 181 182 183 184 185 186
## 4.756105e-03 8.670163e-05 3.563640e-04 9.134903e-06 2.377761e-03 5.077891e-02
## 187 188 189 190 191 192
## 1.826519e-03 1.255733e-02 9.549389e-05 3.445317e-02 4.260614e-04 8.432397e-03
## 193 194 195 196 197 198
## 4.773953e-03 1.865727e-04 4.778635e-05 8.734297e-03 6.612609e-05 1.463888e-04
## 199 200
## 4.704438e-04 1.143223e-03
which(cooks.distance(mod_hsb_trans) >= 1)
## named integer(0)
plot(cooks.distance(mod_hsb_trans), type="h")
Interaction
?hsb
mod_int1 <- lm(socst ~ gender * race, data = hsb)
mod_inta <- lm(socst ~ gender + race, data = hsb)
anova(mod_int1, mod_inta)[2,4] > 0.05
## [1] FALSE
mod_int2 <- lm(socst ~ gender * ses, data =hsb)
mod_intb <- lm(socst ~ gender + ses, data = hsb)
anova(mod_int2, mod_intb)[2,4] > 0.05
## [1] FALSE
mod_int3 <- lm(socst ~ ses * race, data =hsb)
mod_intc <- lm(socst ~ ses + race, data = hsb)
anova(mod_int3, mod_intc)[2,4] > 0.05
## [1] FALSE
mod_int4 <- lm(socst ~ schtyp * race, data =hsb)
mod_intd <- lm(socst ~ schtyp + race, data = hsb)
anova(mod_int4, mod_intd)[2,4] > 0.05
## [1] FALSE
mod_int5 <- lm(socst ~ schtyp * prog, data =hsb)
mod_inte <- lm(socst ~ schtyp + prog, data = hsb)
anova(mod_int5, mod_inte)[2,4] > 0.05
## [1] FALSE
mod_int6 <- lm(socst ~ ses * prog, data =hsb)
mod_intf <- lm(socst ~ ses + prog, data = hsb)
anova(mod_int6, mod_intf)[2,4] > 0.05
## [1] FALSE
No interaction effect between the varibles above as all the p values are not significant.
Use Generalzied Square Method to fix the error issue.
mod_hsb_glm <- glm((socst) ^ 3/2 ~ ., data = hsb)
summary(mod_hsb_glm)
##
## Call:
## glm(formula = (socst)^3/2 ~ ., data = hsb)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -78673 -21777 975 19273 80918
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -79314.46 22067.80 -3.594 0.000417 ***
## id 111.62 82.92 1.346 0.179949
## gendermale -1561.40 4947.90 -0.316 0.752686
## raceasian -24133.08 12133.74 -1.989 0.048184 *
## racehispanic -3566.47 9815.42 -0.363 0.716756
## racewhite -17852.81 10129.13 -1.763 0.079633 .
## seslow -18517.10 6618.87 -2.798 0.005693 **
## sesmiddle -6707.77 5314.21 -1.262 0.208454
## schtyppublic 11462.67 8058.72 1.422 0.156596
## proggeneral -4774.20 6084.73 -0.785 0.433680
## progvocation -13202.52 6332.90 -2.085 0.038465 *
## read 1545.92 335.00 4.615 7.34e-06 ***
## write 1324.04 356.09 3.718 0.000266 ***
## math 317.97 368.89 0.862 0.389822
## science -15.96 345.20 -0.046 0.963176
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 935287758)
##
## Null deviance: 3.7569e+11 on 199 degrees of freedom
## Residual deviance: 1.7303e+11 on 185 degrees of freedom
## AIC: 4715.3
##
## Number of Fisher Scoring iterations: 2
par(mfrow = c(2, 2))
plot(mod_hsb_glm)
library(MASS)
#Huber’s method
ghuber_hsb <- rlm((socst) ^ 3/2 ~.,data = hsb)
par(mfrow = c(2, 2))
plot(ghuber_hsb)
None of the remedies fix the scale-location plot.
Just Experiments
one categorical variable
library(tidyverse)
library(MASS)
str(denim)
## 'data.frame': 95 obs. of 2 variables:
## $ waste : num 1.2 16.4 12.1 11.5 24 10.1 -6 9.7 10.2 -3.7 ...
## $ supplier: Factor w/ 5 levels "1","2","3","4",..: 1 2 3 4 5 1 2 3 4 5 ...
## - attr(*, "na.action")= 'omit' Named int [1:15] 70 75 80 85 90 95 98 99 100 103 ...
## ..- attr(*, "names")= chr [1:15] "70" "75" "80" "85" ...
ggplot(denim, aes(x = denim$supplier)) + geom_bar() + labs(y = "denim$waste")
par(mfrow = c(2, 2))
plot(denim$supplier, denim$waste)
mod_denim1 <- lm(waste ~ supplier, data = denim)
summary(mod_denim1)
##
## Call:
## lm(formula = waste ~ supplier, data = denim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.432 -4.377 -1.323 2.639 61.368
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.5227 2.1021 2.152 0.0341 *
## supplier2 4.3091 2.9728 1.450 0.1507
## supplier3 0.3089 3.0879 0.100 0.9206
## supplier4 2.9667 3.0879 0.961 0.3392
## supplier5 5.8542 3.4491 1.697 0.0931 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.86 on 90 degrees of freedom
## Multiple R-squared: 0.04901, Adjusted R-squared: 0.006747
## F-statistic: 1.16 on 4 and 90 DF, p-value: 0.334
par(mfrow = c(2, 2))
plot(mod_denim1)
stripchart(denim$waste ~ denim$supplier)
TukeyHSD(aov(mod_denim1))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = mod_denim1)
##
## $supplier
## diff lwr upr p adj
## 2-1 4.3090909 -3.966713 12.584895 0.5976181
## 3-1 0.3088517 -8.287424 8.905127 0.9999769
## 4-1 2.9667464 -5.629529 11.563022 0.8718682
## 5-1 5.8541958 -3.747712 15.456104 0.4408168
## 3-2 -4.0002392 -12.596515 4.596036 0.6946720
## 4-2 -1.3423445 -9.938620 7.253931 0.9924515
## 5-2 1.5451049 -8.056803 11.147013 0.9915352
## 4-3 2.6578947 -6.247327 11.563116 0.9203538
## 5-3 5.5453441 -4.334112 15.424800 0.5251000
## 5-4 2.8874494 -6.992007 12.766906 0.9258057
min(denim$waste)
## [1] -11.6
boxcox(waste + 12 ~ supplier, data = denim)
mod_denim_trans = lm((waste + 12) ^ 1/2 ~., data = denim)
summary(mod_denim_trans)
##
## Call:
## lm(formula = (waste + 12)^1/2 ~ ., data = denim)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2159 -2.1886 -0.6614 1.3197 30.6841
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.2614 1.0510 7.860 7.91e-12 ***
## supplier2 2.1545 1.4864 1.450 0.1507
## supplier3 0.1544 1.5440 0.100 0.9206
## supplier4 1.4834 1.5440 0.961 0.3392
## supplier5 2.9271 1.7246 1.697 0.0931 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.93 on 90 degrees of freedom
## Multiple R-squared: 0.04901, Adjusted R-squared: 0.006747
## F-statistic: 1.16 on 4 and 90 DF, p-value: 0.334
TukeyHSD(aov(mod_denim_trans))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = mod_denim_trans)
##
## $supplier
## diff lwr upr p adj
## 2-1 2.1545455 -1.983356 6.292447 0.5976181
## 3-1 0.1544258 -4.143712 4.452564 0.9999769
## 4-1 1.4833732 -2.814765 5.781511 0.8718682
## 5-1 2.9270979 -1.873856 7.728052 0.4408168
## 3-2 -2.0001196 -6.298257 2.298018 0.6946720
## 4-2 -0.6711722 -4.969310 3.626965 0.9924515
## 5-2 0.7725524 -4.028402 5.573506 0.9915352
## 4-3 1.3289474 -3.123663 5.781558 0.9203538
## 5-3 2.7726721 -2.167056 7.712400 0.5251000
## 5-4 1.4437247 -3.496003 6.383453 0.9258057
par(mfrow = c(2, 2))
plot(mod_denim_trans) #plot[4] worth paying attention
mod_bf <- lm(Butterfat ~ ., data = butterfat)
mod_bf_int <- lm(Butterfat ~ Breed * Age, data = butterfat)
interaction.plot(butterfat$Breed,butterfat$Age, butterfat$Butterfat)
summary(mod_bf)
##
## Call:
## lm(formula = Butterfat ~ ., data = butterfat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0202 -0.2373 -0.0640 0.2617 1.2098
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.00770 0.10135 39.541 < 2e-16 ***
## BreedCanadian 0.37850 0.13085 2.893 0.00475 **
## BreedGuernsey 0.89000 0.13085 6.802 9.48e-10 ***
## BreedHolstein-Fresian -0.39050 0.13085 -2.984 0.00362 **
## BreedJersey 1.23250 0.13085 9.419 3.16e-15 ***
## AgeMature 0.10460 0.08276 1.264 0.20937
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4138 on 94 degrees of freedom
## Multiple R-squared: 0.6825, Adjusted R-squared: 0.6656
## F-statistic: 40.41 on 5 and 94 DF, p-value: < 2.2e-16
summary(mod_bf_int)
##
## Call:
## lm(formula = Butterfat ~ Breed * Age, data = butterfat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0190 -0.2720 -0.0430 0.2372 1.3170
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.9660 0.1316 30.143 < 2e-16 ***
## BreedCanadian 0.5220 0.1861 2.805 0.00616 **
## BreedGuernsey 0.9330 0.1861 5.014 2.65e-06 ***
## BreedHolstein-Fresian -0.3030 0.1861 -1.628 0.10693
## BreedJersey 1.1670 0.1861 6.272 1.22e-08 ***
## AgeMature 0.1880 0.1861 1.010 0.31503
## BreedCanadian:AgeMature -0.2870 0.2631 -1.091 0.27834
## BreedGuernsey:AgeMature -0.0860 0.2631 -0.327 0.74457
## BreedHolstein-Fresian:AgeMature -0.1750 0.2631 -0.665 0.50773
## BreedJersey:AgeMature 0.1310 0.2631 0.498 0.61982
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4161 on 90 degrees of freedom
## Multiple R-squared: 0.6926, Adjusted R-squared: 0.6619
## F-statistic: 22.53 on 9 and 90 DF, p-value: < 2.2e-16
anova(mod_bf, mod_bf_int)
## Analysis of Variance Table
##
## Model 1: Butterfat ~ Breed + Age
## Model 2: Butterfat ~ Breed * Age
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 94 16.094
## 2 90 15.580 4 0.51387 0.7421 0.5658
anova(mod_bf, mod_bf_int)[2,6] > 0.05
## [1] TRUE
par(mfrow = c(2, 2))
plot(mod_bf)
boxcox(mod_bf)
# transform mod_bf
mod_bf_trans <- lm((Butterfat) ^ (-1.4) ~., data = butterfat)
boxcox(mod_bf_trans)
par(mfrow = c(2, 2))
plot(mod_bf_trans)
Examine Leverages of the transformed model
hatvalues(mod_bf_trans)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06
## 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06
## 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06
## 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06
## 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
## 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06 0.06
## 97 98 99 100
## 0.06 0.06 0.06 0.06
plot(hatvalues(mod_bf_trans), type="h")
## Warning in plot.window(...): relative range of values ( 0 * EPS) is small (axis
## 2)
Examine internally Studentized residuals of the transformed model
rstandard(mod_bf_trans)
## 1 2 3 4 5 6
## 1.302175124 -0.038684857 1.176756932 0.843010892 -0.059132740 -0.214583261
## 7 8 9 10 11 12
## -0.607305220 0.216601983 -0.092888120 -0.838010387 -1.105496473 -1.198459522
## 13 14 15 16 17 18
## -0.545540009 1.137560855 0.008974190 0.367430174 -1.020923831 -0.385358497
## 19 20 21 22 23 24
## -0.905989144 1.959861910 1.738937095 -1.495282498 -0.033796346 0.224513753
## 25 26 27 28 29 30
## 1.198241419 0.803309241 0.219953303 1.224316878 -0.006208173 -1.705575476
## 31 32 33 34 35 36
## 0.486532361 -2.078760161 0.077454061 1.152041151 -0.430498446 0.194061984
## 37 38 39 40 41 42
## -0.982070298 -0.823597445 0.162487176 0.073940420 1.007682878 -0.733936195
## 43 44 45 46 47 48
## -1.413229616 -0.454474406 0.749448915 0.093751336 0.552280746 2.829679897
## 49 50 51 52 53 54
## -0.630240456 0.406924472 -0.666971410 0.822372580 0.876878115 -0.454474406
## 55 56 57 58 59 60
## 0.293918731 0.688775444 -0.015161843 -1.103095821 -0.828222845 -2.021906115
## 61 62 63 64 65 66
## 1.365837893 0.317634100 -0.612931689 -1.366439777 -2.623365192 -0.365173950
## 67 68 69 70 71 72
## 1.915781005 -1.291943155 0.468040966 0.365631301 -0.451440051 -0.189649999
## 73 74 75 76 77 78
## 0.468040966 1.180235981 -0.115875664 -1.329304913 0.421319025 0.317634100
## 79 80 81 82 83 84
## 0.610104477 0.915864576 1.023007453 -1.963042444 0.218568783 1.509555514
## 85 86 87 88 89 90
## 0.104266735 -0.969700347 -0.203148991 0.800634831 0.218568783 -0.169371739
## 91 92 93 94 95 96
## -0.753195015 0.004082009 0.085520888 0.824236537 0.218568783 2.301425234
## 97 98 99 100
## -1.083112579 0.616015843 -1.782602653 -1.000277627
plot(rstandard(mod_bf_trans), type="h")
Examine mean-shift t-statistics of the transformed model
rstudent(mod_bf_trans)
## 1 2 3 4 5 6
## 1.307072894 -0.038478843 1.179198734 0.841702597 -0.058818457 -0.213491104
## 7 8 9 10 11 12
## -0.605254796 0.215500551 -0.092396954 -0.836672158 -1.106819034 -1.201280711
## 13 14 15 16 17 18
## -0.543491500 1.139363484 0.008926331 0.365733264 -1.021156008 -0.383606367
## 19 20 21 22 23 24
## -0.905117616 1.990501664 1.758173355 -1.505317976 -0.033616302 0.223376237
## 25 26 27 28 29 30
## 1.201058704 0.801781731 0.218836532 1.227614427 -0.006175064 -1.723354620
## 31 32 33 34 35 36
## 0.484547989 -2.116903550 0.077043429 1.154073108 -0.428625181 0.193065658
## 37 38 39 40 41 42
## -0.981882713 -0.822176717 0.161643274 0.073548207 1.007766454 -0.732122551
## 43 44 45 46 47 48
## -1.420867970 -0.452547992 0.747688987 0.093255685 0.550228652 2.942709942
## 49 50 51 52 53 54
## -0.628207821 0.405111166 -0.664989587 0.820945097 0.875790716 -0.452547992
## 55 56 57 58 59 60
## 0.292485584 0.686837352 -0.015080997 -1.104383954 -0.826827985 -2.056335073
## 61 62 63 64 65 66
## 1.372238251 0.316109727 -0.610884666 -1.372855207 -2.710481140 -0.363484260
## 67 68 69 70 71 72
## 1.943890751 -1.296616027 0.466088144 0.363940142 -0.449519912 -0.188674625
## 73 74 75 76 77 78
## 0.466088144 1.182737362 -0.115265888 -1.334821023 0.419468223 0.316109727
## 79 80 81 82 83 84
## 0.608055667 0.915071896 1.023263544 -1.993869890 0.217458340 1.520042040
## 85 86 87 88 89 90
## 0.103716640 -0.969389351 -0.202109894 0.799094049 0.217458340 -0.168494129
## 91 92 93 94 95 96
## -0.751448926 0.004060238 0.085068083 0.822819349 0.217458340 2.356503608
## 97 98 99 100
## -1.084122174 0.613970948 -1.803847041 -1.000280614
critval <- qt(0.05/(2*nobs(mod_bf_trans)), df=df.residual(mod_bf_trans)-1, lower=FALSE)
which(abs(rstudent(mod_bf_trans)) > critval)
## named integer(0)
Examine Cook’s distances to check influences
cooks.distance(mod_bf_trans)
## 1 2 3 4 5 6
## 1.803894e-02 1.592041e-05 1.473146e-02 7.560291e-03 3.719873e-05 4.898508e-04
## 7 8 9 10 11 12
## 3.923613e-03 4.991108e-04 9.178939e-05 7.470866e-03 1.300130e-02 1.527984e-02
## 13 14 15 16 17 18
## 3.166105e-03 1.376643e-02 8.567668e-07 1.436223e-03 1.108814e-02 1.579800e-03
## 19 20 21 22 23 24
## 8.732089e-03 4.086233e-02 3.216917e-02 2.378585e-02 1.215099e-05 5.362386e-04
## 25 26 27 28 29 30
## 1.527428e-02 6.864955e-03 5.146751e-04 1.594630e-02 4.100150e-07 3.094668e-02
## 31 32 33 34 35 36
## 2.518231e-03 4.597068e-02 6.382055e-05 1.411914e-02 1.971584e-03 4.006389e-04
## 37 38 39 40 41 42
## 1.026023e-02 7.216093e-03 2.808732e-04 5.816155e-05 1.080239e-02 5.730450e-03
## 43 44 45 46 47 48
## 2.124700e-02 2.197308e-03 5.975252e-03 9.350333e-05 3.244830e-03 8.518179e-02
## 49 50 51 52 53 54
## 4.225564e-03 1.761569e-03 4.732456e-03 7.194645e-03 8.179949e-03 2.197308e-03
## 55 56 57 58 59 60
## 9.190236e-04 5.046932e-03 2.445548e-06 1.294490e-02 7.297373e-03 4.349047e-02
## 61 62 63 64 65 66
## 1.984588e-02 1.073313e-03 3.996652e-03 1.986338e-02 7.321324e-02 1.418638e-03
## 67 68 69 70 71 72
## 3.904486e-02 1.775657e-02 2.330450e-03 1.422194e-03 2.168065e-03 3.826290e-04
## 73 74 75 76 77 78
## 2.330450e-03 1.481869e-02 1.428422e-04 1.879842e-02 1.888401e-03 1.073313e-03
## 79 80 81 82 83 84
## 3.959867e-03 8.923489e-03 1.113345e-02 4.099506e-02 5.082161e-04 2.424210e-02
## 85 86 87 88 89 90
## 1.156548e-04 1.000339e-02 4.390374e-04 6.819321e-03 5.082161e-04 3.051786e-04
## 91 92 93 94 95 96
## 6.035135e-03 1.772638e-07 7.780662e-05 7.227296e-03 5.082161e-04 5.634636e-02
## 97 98 99 100
## 1.248014e-02 4.036974e-03 3.380502e-02 1.064421e-02
which(cooks.distance(mod_bf_trans) >= 1)
## named integer(0)
plot(cooks.distance(mod_bf_trans), type="h")
TukeyHSD(aov(Butterfat ~ Breed, data = butterfat))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Butterfat ~ Breed, data = butterfat)
##
## $Breed
## diff lwr upr p adj
## Canadian-Ayrshire 0.3785 0.01348598 0.74351402 0.0381804
## Guernsey-Ayrshire 0.8900 0.52498598 1.25501402 0.0000000
## Holstein-Fresian-Ayrshire -0.3905 -0.75551402 -0.02548598 0.0297910
## Jersey-Ayrshire 1.2325 0.86748598 1.59751402 0.0000000
## Guernsey-Canadian 0.5115 0.14648598 0.87651402 0.0016669
## Holstein-Fresian-Canadian -0.7690 -1.13401402 -0.40398598 0.0000007
## Jersey-Canadian 0.8540 0.48898598 1.21901402 0.0000000
## Holstein-Fresian-Guernsey -1.2805 -1.64551402 -0.91548598 0.0000000
## Jersey-Guernsey 0.3425 -0.02251402 0.70751402 0.0767002
## Jersey-Holstein-Fresian 1.6230 1.25798598 1.98801402 0.0000000
with(butterfat, tapply(Butterfat, Breed, mean))
## Ayrshire Canadian Guernsey Holstein-Fresian
## 4.0600 4.4385 4.9500 3.6695
## Jersey
## 5.2925
TukeyHSD(aov(Butterfat ~ Age, data = butterfat))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Butterfat ~ Age, data = butterfat)
##
## $Age
## diff lwr upr p adj
## Mature-2year 0.1046 -0.1800704 0.3892704 0.4676321
with(butterfat, tapply(Butterfat, Age, mean))
## 2year Mature
## 4.4298 4.5344
three categorical variabls
eggs
## Fat Lab Technician Sample
## 1 0.62 I one G
## 2 0.55 I one G
## 3 0.34 I one H
## 4 0.24 I one H
## 5 0.80 I two G
## 6 0.68 I two G
## 7 0.76 I two H
## 8 0.65 I two H
## 9 0.30 II one G
## 10 0.40 II one G
## 11 0.33 II one H
## 12 0.43 II one H
## 13 0.39 II two G
## 14 0.40 II two G
## 15 0.29 II two H
## 16 0.18 II two H
## 17 0.46 III one G
## 18 0.38 III one G
## 19 0.27 III one H
## 20 0.37 III one H
## 21 0.37 III two G
## 22 0.42 III two G
## 23 0.45 III two H
## 24 0.54 III two H
## 25 0.18 IV one G
## 26 0.47 IV one G
## 27 0.53 IV one H
## 28 0.32 IV one H
## 29 0.40 IV two G
## 30 0.37 IV two G
## 31 0.31 IV two H
## 32 0.43 IV two H
## 33 0.35 V one G
## 34 0.39 V one G
## 35 0.37 V one H
## 36 0.33 V one H
## 37 0.42 V two G
## 38 0.36 V two G
## 39 0.20 V two H
## 40 0.41 V two H
## 41 0.37 VI one G
## 42 0.43 VI one G
## 43 0.28 VI one H
## 44 0.36 VI one H
## 45 0.18 VI two G
## 46 0.20 VI two G
## 47 0.26 VI two H
## 48 0.06 VI two H
mod_eggs <- lm(Fat ~., data = eggs)
summary(mod_eggs)
##
## Call:
## lm(formula = Fat ~ ., data = eggs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.30583 -0.04656 0.01312 0.06656 0.19500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.59500 0.04773 12.467 2.33e-15 ***
## LabII -0.24000 0.05845 -4.106 0.000193 ***
## LabIII -0.17250 0.05845 -2.951 0.005273 **
## LabIV -0.20375 0.05845 -3.486 0.001206 **
## LabV -0.22625 0.05845 -3.871 0.000392 ***
## LabVI -0.31250 0.05845 -5.346 3.91e-06 ***
## Techniciantwo 0.01917 0.03375 0.568 0.573244
## SampleH -0.04917 0.03375 -1.457 0.152947
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1169 on 40 degrees of freedom
## Multiple R-squared: 0.4657, Adjusted R-squared: 0.3722
## F-statistic: 4.98 on 7 and 40 DF, p-value: 0.0004051
par(mfrow = c(2, 2))
plot(mod_eggs)#plot[4] worth paying attention (seperated into groups but seems have some violations)
#test interaction
mod_eggs1 <- lm(Fat ~ Lab * Technician, data = eggs)
mod_eggsa<- lm(Fat ~ Lab + Technician, data = eggs)
anova(mod_eggs1, mod_eggsa) #Interaction between Lab & Technician
## Analysis of Variance Table
##
## Model 1: Fat ~ Lab * Technician
## Model 2: Fat ~ Lab + Technician
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 36 0.33260
## 2 41 0.57567 -5 -0.24307 5.2618 0.0009971 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod_eggs2 <- lm(Fat ~ Lab * Sample, data = eggs)
mod_eggsb<- lm(Fat ~ Lab + Sample, data = eggs)
anova(mod_eggs2, mod_eggsb) #No interaction
## Analysis of Variance Table
##
## Model 1: Fat ~ Lab * Sample
## Model 2: Fat ~ Lab + Sample
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 36 0.50200
## 2 41 0.55107 -5 -0.049067 0.7037 0.6243
mod_eggs3 <- lm(Fat ~ Technician * Sample, data = eggs)
mod_eggsc<- lm(Fat ~ Technician + Sample, data = eggs)
anova(mod_eggs3, mod_eggsc) #No interaction
## Analysis of Variance Table
##
## Model 1: Fat ~ Technician * Sample
## Model 2: Fat ~ Technician + Sample
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 44 0.98805
## 2 45 0.98968 -1 -0.0016333 0.0727 0.7887
mod_egg_int <- lm(Fat ~ Technician + Sample + Lab + Lab * Technician, data = eggs)
par(mfrow = c(2, 2))
plot(mod_egg_int) #Many issues for this one. I can research on why the issues happane and how can we possibly solve it
boxcox(mod_egg_int) #No transformation needed
All the three datasets only contain categorical variabls which many have some limitations.